This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
This version of beta calculation is used for the theoretical method/results of the paper. For reference: the last update was Dec 2018 for an R notebook. This R notebook is an updated and more sophisticated version.
Constants used are listed at the start of this script.
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
#values needed
K= 1.38064852*(10)^-23 #m2 kg/ s2 K boltzmann constant
mu= 1.126*(10)^-3 #kg/m s dynamic viscosity in 18C
v= 1.099*(10)^-6 #m2/s kinematic viscosity in 18C
Reh_calc= 2.3E-6 #in m radius Ehux
Reh_naked= 1.8E-6 #in m radius Ehux
Reh_lith = 1.25E-6 #in m radius
Rehv= 90*(10)^-9 #in m radius virus
Temp = 18+273.15 #temp in kelvin, here assuming 18C
Den_OcM = 1.05 #g/cm3 density organic cell matter
Den_CH2O= 1.025 #g/cm3 density seawater at 18C
hostnum <- (10)^3
virnum <- hostnum*10
lithnum <- hostnum*50
require (ggplot2)
Loading required package: ggplot2
package 㤼㸱ggplot2㤼㸲 was built under R version 3.5.3
require(plotly)
Loading required package: plotly
package 㤼㸱plotly㤼㸲 was built under R version 3.5.3
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
require(grid)
Loading required package: grid
require(ggthemes)
Loading required package: ggthemes
package 㤼㸱ggthemes㤼㸲 was built under R version 3.5.3
require (dplyr)
Loading required package: dplyr
package 㤼㸱dplyr㤼㸲 was built under R version 3.5.3
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
require(plyr)
Loading required package: plyr
----------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
----------------------------------------------------------------------------------------------------------------
Attaching package: 㤼㸱plyr㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
The following objects are masked from 㤼㸱package:plotly㤼㸲:
arrange, mutate, rename, summarise
require(tidyverse)
Loading required package: tidyverse
package 㤼㸱tidyverse㤼㸲 was built under R version 3.5.3[37m-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --[39m
[37m[32mv[37m [34mtibble [37m 2.1.3 [32mv[37m [34mpurrr [37m 0.3.2
[32mv[37m [34mtidyr [37m 0.8.3 [32mv[37m [34mstringr[37m 1.4.0
[32mv[37m [34mreadr [37m 1.3.1 [32mv[37m [34mforcats[37m 0.4.0[39m
package 㤼㸱tibble㤼㸲 was built under R version 3.5.3package 㤼㸱tidyr㤼㸲 was built under R version 3.5.3package 㤼㸱readr㤼㸲 was built under R version 3.5.3package 㤼㸱purrr㤼㸲 was built under R version 3.5.3package 㤼㸱stringr㤼㸲 was built under R version 3.5.3package 㤼㸱forcats㤼㸲 was built under R version 3.5.3[37m-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[37m [34mplyr[37m::[32marrange()[37m masks [34mdplyr[37m::arrange(), [34mplotly[37m::arrange()
[31mx[37m [34mpurrr[37m::[32mcompact()[37m masks [34mplyr[37m::compact()
[31mx[37m [34mplyr[37m::[32mcount()[37m masks [34mdplyr[37m::count()
[31mx[37m [34mplyr[37m::[32mfailwith()[37m masks [34mdplyr[37m::failwith()
[31mx[37m [34mdplyr[37m::[32mfilter()[37m masks [34mplotly[37m::filter(), [34mstats[37m::filter()
[31mx[37m [34mplyr[37m::[32mid()[37m masks [34mdplyr[37m::id()
[31mx[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()
[31mx[37m [34mplyr[37m::[32mmutate()[37m masks [34mdplyr[37m::mutate(), [34mplotly[37m::mutate()
[31mx[37m [34mplyr[37m::[32mrename()[37m masks [34mdplyr[37m::rename(), [34mplotly[37m::rename()
[31mx[37m [34mplyr[37m::[32msummarise()[37m masks [34mdplyr[37m::summarise(), [34mplotly[37m::summarise()
[31mx[37m [34mplyr[37m::[32msummarize()[37m masks [34mdplyr[37m::summarize()[39m
source ("theme_Publication.R")
grid.newpage()
Calculating for Brownian motion
#Brownian motion (BM)
#1. make a data frame
BM <- data.frame (group=as.factor(c("Nc", "Cc", "Li")), rad= c(1.8E-6, 2.3E-6, 1.5E-6))
#2. calculate beta (beta)
BM$beta_BM <- ((2*(K*(10)^4)*Temp*(((BM$rad+Rehv)*100)^2))/((3*mu*10)*(BM$rad*Rehv*1e4)))*86400 #cm3/d
# go back to this later
#3. calculate encounters (E)
BM$E_BM <- BM$beta_BM*virnum
BM
Differential settling
#Differential settling (DS)
#1. read in PIC data
library(readr) #always use readr not baseR
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
PIC <- read_csv("Postdoc-R/CSV Files/PIC.csv")
Parsed with column specification:
cols(
Strain = [31mcol_character()[39m,
Replicate = [32mcol_double()[39m,
TC = [32mcol_double()[39m,
AC = [32mcol_double()[39m,
Cellcount = [32mcol_double()[39m
)
PIC$Strain <- as.factor(PIC$Strain)
PIC$Replicate <- as.factor(PIC$Replicate)
#2. calculate PIC for cell
PIC$PIC <- PIC$TC-PIC$AC
PIC$PICpercell <- (PIC$PIC/PIC$Cellcount)*(10)^-3#in g
PIC$PICpercellpg <- PIC$PICpercell*1e12
ggplotly(ggplot(data=PIC, aes(x=Strain, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2) +theme_Publication())
#3a. calculate density of cells (den_cell)
PIC <- mutate(PIC, group = ifelse(PICpercellpg < 4 , "naked", "calcified"))
ggplotly(ggplot(data=PIC, aes(x=group, y=PICpercellpg)) + geom_boxplot()+geom_point(size=2, aes(color=Strain))+ theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
PIC <- mutate(PIC, rad = ifelse(group == "naked" , 1.8E-6, 2.3E-6)) #in m
PIC$volume <- (4/3)*pi*(PIC$rad*100)^3 #in cm3
PIC$Den_cell <- PIC$PICpercell/PIC$volume #g/cm3
PIC$Den_celltotal <- PIC$Den_cell+Den_OcM
ggplotly(ggplot(data=PIC, aes(x=Strain, y=Den_celltotal, color=group)) + geom_boxplot()+geom_point(size=2)
+theme_Publication())
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#some strains that are "naked" have PIC<2. I chose to ignore this since in the lm model I do not use
#strain as a factor, rather data is treated as a whole (e.g., no grouping)
#3b. calculate PIC and density for lith
lithvol <- 3*1e-12 #in cm3, from CJ's paper
PIC$perlith <- PIC$PICpercell/20 #in g, assuming 20 liths attached
PIC$perlithpg <- PIC$perlith*1e12 #in pg
PIC$Denlith <- (PIC$perlith/lithvol) + Den_OcM #in g/cm3, with organic matter attached
PIC$Denlith_noOCM <- (PIC$perlith/lithvol) #in g/cm3, without organic matter attached
#assume grouping
PIC$group2 <- case_when(
PIC$PICpercellpg <2 ~ "naked",
PIC$PICpercellpg >2 & PIC$PICpercellpg < 4 ~ "naked/calcified uncertain",
PIC$PICpercellpg >4 & PIC$PICpercellpg < 10 ~ "moderately calcified",
PIC$PICpercellpg >10 ~ "heavily calcified",
TRUE ~ as.character(PIC$PICpercellpg)
)
#4. calculate sinking velocity of cells,liths, and viruses
PIC$SinkVel <- ((2*((PIC$rad*100)^2)*(981)*(PIC$Den_celltotal-Den_CH2O))/(9*(mu*10)))*864 #meter per day
PIC$SinkVel_lith <- ((2*((Reh_lith*100)^2)*(981)*(PIC$Denlith-Den_CH2O))/(9*(mu*10)))*864 #meter per day
Ehv_SinkVel <- ((2*((Rehv*100)^2)*(981)*(Den_CH2O-Den_CH2O))/(9*(mu*10)))*864 #equals to 0
#g is converted to per day, 864 is the one that converts cm/s to m/day
#plot sinking velocity vs calcification
ggplot(data=PIC, aes(x=PICpercellpg, y=SinkVel, color=Strain, shape=str_wrap(group2, 20))) + geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC"~cell^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())
ggplot(data=PIC, aes(x=perlithpg, y=SinkVel_lith, color=Strain, shape=str_wrap(group2, 20))) +
geom_point(size=5)+theme_Publication()+
labs(y = expression("Sinking velocity"~("m"~day^-1)), x = expression("PIC"~lith^-1)) +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())
#6. calculate encounters
PIC$E_DS <- (PIC$beta_DS*virnum) #E calculated with Virus for cell
PIC$E_DS_lith <- (PIC$beta_DS_lith*virnum) #E calculated with Virus for lith
ggplotly(ggplot(data=PIC, aes(x=PICpercellpg, y=log10(E_DS))) +geom_point(size=5, aes(shape=group2, color=Strain)) +
theme_Publication() + geom_smooth())
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
plotly.js does not (yet) support horizontal legend items
You can track progress here:
https://github.com/plotly/plotly.js/issues/53
#remove strains 621, 623, 625
PIC <- PIC %>% filter (! Strain %in% c("621", "623", "655"))
ggplot(data=PIC, aes(x=PICpercellpg, y=E_DS, color=Strain, shape=str_wrap(group2, 20))) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())
ggplot(data=PIC, aes(x=perlithpg, y=E_DS_lith, color=Strain, shape=str_wrap(group2, 20))) + geom_point(size=5)+theme_Publication()+
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=2),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
theme(legend.direction = "horizontal", legend.box = "vertical", legend.title = element_blank())
#7. summaries
summary_DS <- ddply(PIC, .(Strain), summarize, PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg),
Den_celltotal = mean (Den_celltotal),SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup <- ddply(PIC, .(group2), summarize, PICpercellpg=mean(PICpercellpg), perlithpg = mean(perlithpg),
Den_celltotal = mean (Den_celltotal), SinkVel=mean(SinkVel),beta_DS=mean(beta_DS), E_DS= mean(E_DS),
Denlith = mean (Denlith), SinkVel_lith=mean (SinkVel_lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS
summary_DS_bygroup
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
require(openxlsx)
Loading required package: openxlsx
package 㤼㸱openxlsx㤼㸲 was built under R version 3.5.3
write.xlsx(summary_DS, file = "Postdoc-R/Exported Tables/summary_DS.xlsx")
Note: zip::zip() is deprecated, please use zip::zipr() instead
write.xlsx(summary_DS_bygroup, file = "Postdoc-R/Exported Tables/summary_DS_bygroup.xlsx")
#8. regressions
#8a. PIC and sinkvel of cells
PIC_reg <- lm(SinkVel~PICpercellpg, data=PIC)
summary(PIC_reg)
Call:
lm(formula = SinkVel ~ PICpercellpg, data = PIC)
Residuals:
Min 1Q Median 3Q Max
-0.0075101 -0.0021196 0.0001935 0.0015029 0.0106975
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0191654 0.0008908 21.51 <2e-16 ***
PICpercellpg 0.0176146 0.0001047 168.20 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.004047 on 35 degrees of freedom
Multiple R-squared: 0.9988, Adjusted R-squared: 0.9987
F-statistic: 2.829e+04 on 1 and 35 DF, p-value: < 2.2e-16
plot(residuals.lm(PIC_reg))
coef(PIC_reg)
(Intercept) PICpercellpg
0.01916536 0.01761463
cor(PIC$PICpercellpg, PIC$SinkVel)
[1] 0.999382
#8b. PIC and sinkvel of liths
PICSinkVel_lith_reg <- lm(SinkVel_lith~perlithpg, data = PIC)
summary(PICSinkVel_lith_reg)
essentially perfect fit: summary may be unreliable
Call:
lm(formula = SinkVel_lith ~ perlithpg, data = PIC)
Residuals:
Min 1Q Median 3Q Max
-3.110e-17 -1.378e-17 -1.778e-18 1.222e-17 3.317e-17
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.534e-03 3.662e-18 1.784e+15 <2e-16 ***
perlithpg 8.712e-02 8.611e-18 1.012e+16 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.664e-17 on 35 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 1.024e+32 on 1 and 35 DF, p-value: < 2.2e-16
plot(residuals.lm(PICSinkVel_lith_reg))
coef(PICSinkVel_lith_reg)
(Intercept) perlithpg
0.006534192 0.087122558
cor(PIC$perlithpg, PIC$SinkVel_lith)
[1] 1
#8c. for PIC of liths depending on PICs of cells
PIC_lith_reg <- lm (perlithpg~PICpercellpg, data=PIC)
summary(PIC_lith_reg)
essentially perfect fit: summary may be unreliable
Call:
lm(formula = perlithpg ~ PICpercellpg, data = PIC)
Residuals:
Min 1Q Median 3Q Max
-4.012e-16 -1.880e-17 9.870e-18 2.897e-17 1.908e-16
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.650e-17 1.902e-17 -1.919e+00 0.0632 .
PICpercellpg 5.000e-02 2.236e-18 2.236e+16 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8.641e-17 on 35 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 4.999e+32 on 1 and 35 DF, p-value: < 2.2e-16
plot(residuals.lm(PIC_lith_reg))
coef(PIC_lith_reg)
(Intercept) PICpercellpg
-3.650391e-17 5.000000e-02
cor(PIC$perlithpg, PIC$SinkVel_lith)
[1] 1
#8d. for sinking velocity of liths depending on PICs of cells
SinkVel_lith_reg <- lm (SinkVel_lith~PICpercellpg, data=PIC)
summary(SinkVel_lith_reg)
essentially perfect fit: summary may be unreliable
Call:
lm(formula = SinkVel_lith ~ PICpercellpg, data = PIC)
Residuals:
Min 1Q Median 3Q Max
-5.558e-17 -1.279e-17 4.460e-19 1.647e-17 2.985e-17
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.534e-03 4.341e-18 1.505e+15 <2e-16 ***
PICpercellpg 4.356e-03 5.103e-19 8.536e+15 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.972e-17 on 35 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 7.286e+31 on 1 and 35 DF, p-value: < 2.2e-16
plot(residuals.lm(SinkVel_lith_reg))
coef(SinkVel_lith_reg)
(Intercept) PICpercellpg
0.006534192 0.004356128
cor(PIC$perlithpg, PIC$SinkVel_lith)
[1] 1
#
#10. calculate new beta and encounters based on newdata
PIC_newdata <- mutate(PIC_newdata, rad = ifelse(group == "naked" , 1.8E-6, 2.3E-6)) #in m
PIC_newdata$beta_DS <- (pi*(((PIC_newdata$rad+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel.pred-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC_newdata$beta_DS_lith <- (pi*(((Reh_lith+Rehv)*100)^2)*(abs((PIC_newdata$SinkVel.pred.lith-Ehv_SinkVel)/864)))*86400 #in encounters cm3/day
PIC_newdata$E_DS <- (PIC_newdata$beta_DS*virnum) #E calculated with Virus for cell
PIC_newdata$E_DS_lith <- (PIC_newdata$beta_DS_lith*virnum) #E calculated with Virus for cell
ggplot(data=PIC_newdata, aes(x=PICpercellpg, y=E_DS)) +geom_point(size=5, aes(color=group2)) +
theme_Publication() + geom_smooth() +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=4),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC"~cell^-1)) +
theme(legend.title = element_blank())
summary_DS_bygroup_pred <- ddply(PIC_newdata, .(group2), summarize, PICpercellpg=mean(PICpercellpg),
perlithpg.pred = mean(perlithpg.pred), SinkVel.pred=mean(SinkVel.pred),beta_DS=mean(beta_DS),
E_DS= mean(E_DS), SinkVel.pred.lith=mean (SinkVel.pred.lith),
beta_DS_lith=mean (beta_DS_lith), E_DS_lith=mean (E_DS_lith))
summary_DS_bygroup_pred
#remove naked/calcified uncertain
PIC_newdata.trim <- PIC_newdata %>% filter (! group2 %in% c("naked/calcified uncertain"))
ggplot(data=PIC_newdata.trim, aes(x=PICpercellpg, y=E_DS)) +geom_point(size=5, aes(color=group2)) +
theme_Publication() + geom_smooth() +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x, n=4),
labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides="l") +
labs(y = expression("viral encounters " ~ day^-1~cell^-1), x = expression("PIC"~cell^-1)) +
theme(legend.title = element_blank())
setwd("D:/R program")
The working directory was changed to D:/R program inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
write.xlsx(summary_DS_bygroup_pred, file = "Postdoc-R/Exported Tables/summary_DS_bygroup_pred.xlsx")
cannot create file 'Postdoc-R/Exported Tables/summary_DS_bygroup_pred.xlsx', reason 'Permission denied'
Turbulence
#Turbulence
#1. make new dataframe